{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "780abad8-da1f-485c-ae28-62a057b3164f",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "test 1 originaltext word total: 26\n",
      "test 1 originaltext: [('knowledge', 2), ('work', 2), ('Even', 1), ('limited', 1), ('amount', 1), ('areas', 1), ('allow', 1), ('important', 1), ('impressive', 1), ('like', 1), ('building', 1), ('websites', 1), ('extending', 1), ('functionality', 1), ('existing', 1), ('tools', 1), ('without', 1), ('deeper', 1), ('solving', 1), ('complex', 1), ('problems', 1), ('becomes', 1), ('difficult', 1), ('impossible', 1)]\n",
      "test 1 faketext word total: 75\n",
      "test 1 faketext: [('may', 4), ('digital', 3), ('principles', 2), ('potential', 2), ('librarians', 2), ('coding', 2), ('possess', 1), ('technical', 1), ('skills', 1), ('manipulate', 1), ('code', 1), ('create', 1), ('functional', 1), ('websites', 1), ('applications', 1), ('comprehension', 1), ('underlying', 1), ('risks', 1), ('involved', 1), ('limited', 1), ('Similar', 1), ('boiler', 1), ('technicians', 1), ('lack', 1), ('comprehensive', 1), ('knowledge', 1), ('electrical', 1), ('devices', 1), ('often', 1), ('understand', 1), ('utilize', 1), ('languages', 1), ('libraries', 1), ('struggle', 1), ('grasp', 1), ('intricate', 1), ('workings', 1), ('vulnerabilities', 1), ('lie', 1), ('beneath', 1), ('surface', 1), ('result', 1), ('inadvertently', 1), ('introduce', 1), ('security', 1), ('flaws', 1), ('encounter', 1), ('difficulties', 1), ('troubleshooting', 1), ('complex', 1), ('issues', 1), ('deeper', 1), ('understanding', 1), ('web', 1), ('development', 1), ('best', 1), ('practices', 1), ('could', 1), ('approach', 1), ('endeavors', 1), ('heightened', 1), ('awareness', 1), ('ensuring', 1), ('integrity', 1), ('reliability', 1), ('resources', 1)]\n",
      "test 1 originaltext in situ word total: 119\n",
      "test 1 originaltext in situ: [('boilers', 3), ('knowledge', 3), ('electrical', 3), ('boiler', 2), ('technician', 2), ('complex', 2), ('understanding', 2), ('came', 2), ('worked', 2), ('problem', 2), ('web', 2), ('tools', 2), ('work', 2), ('wasnt', 1), ('really', 1), ('matter', 1), ('intelligence', 1), ('highly', 1), ('trained', 1), ('running', 1), ('used', 1), ('class', 1), ('ship', 1), ('old', 1), ('essentially', 1), ('steam', 1), ('bombs', 1), ('required', 1), ('constant', 1), ('attention', 1), ('ency¬clopedic', 1), ('careful', 1), ('maintenance', 1), ('didnt', 1), ('type', 1), ('critical', 1), ('thinking', 1), ('devices', 1), ('appliances', 1), ('understood', 1), ('something', 1), ('better', 1), ('equipment', 1), ('inherent', 1), ('dan¬gers', 1), ('likely', 1), ('would', 1), ('shut', 1), ('coffee', 1), ('maker', 1), ('qualified', 1), ('person', 1), ('repaired', 1), ('wouldnt', 1), ('gone', 1), ('long', 1), ('perspective', 1), ('often', 1), ('true', 1), ('coding', 1), ('devel¬opment', 1), ('especially', 1), ('libraries', 1), ('Almost', 1), ('everyone', 1), ('library', 1), ('uses', 1), ('digital', 1), ('every', 1), ('day', 1), ('sometimes', 1), ('nearcomplete', 1), ('exclusion', 1), ('print', 1), ('growing', 1), ('number', 1), ('librarians', 1), ('learning', 1), ('development', 1), ('HTML', 1), ('JavaScript', 1), ('Python', 1), ('PHP', 1), ('Even', 1), ('limited', 1), ('amount', 1), ('areas', 1), ('allow', 1), ('important', 1), ('impressive', 1), ('like', 1), ('building', 1), ('websites', 1), ('extending', 1), ('functionality', 1), ('existing', 1), ('without', 1), ('deeper', 1), ('solving', 1), ('problems', 1), ('becomes', 1), ('difficult', 1), ('impossible', 1)]\n",
      "test 1 faketext word total in situ: 168\n",
      "test 1 faketext in situ: [('electrical', 4), ('digital', 4), ('may', 4), ('boiler', 3), ('boilers', 3), ('understanding', 3), ('coding', 3), ('web', 3), ('development', 3), ('librarians', 3), ('technician', 2), ('complex', 2), ('knowledge', 2), ('came', 2), ('devices', 2), ('worked', 2), ('problem', 2), ('often', 2), ('libraries', 2), ('principles', 2), ('potential', 2), ('wasnt', 1), ('really', 1), ('matter', 1), ('intelligence', 1), ('highly', 1), ('trained', 1), ('running', 1), ('used', 1), ('class', 1), ('ship', 1), ('old', 1), ('essentially', 1), ('steam', 1), ('bombs', 1), ('required', 1), ('constant', 1), ('attention', 1), ('encyclopedic', 1), ('careful', 1), ('maintenance', 1), ('didnt', 1), ('type', 1), ('critical', 1), ('thinking', 1), ('appliances', 1), ('understood', 1), ('something', 1), ('better', 1), ('equipment', 1), ('inherent', 1), ('dangers', 1), ('likely', 1), ('would', 1), ('shut', 1), ('coffee', 1), ('maker', 1), ('qualified', 1), ('person', 1), ('repaired', 1), ('wouldnt', 1), ('gone', 1), ('long', 1), ('perspective', 1), ('true', 1), ('especially', 1), ('Almost', 1), ('everyone', 1), ('library', 1), ('uses', 1), ('tools', 1), ('every', 1), ('day', 1), ('sometimes', 1), ('nearcomplete', 1), ('exclusion', 1), ('print', 1), ('growing', 1), ('number', 1), ('learning', 1), ('HTML', 1), ('JavaScript', 1), ('Python', 1), ('PHP', 1), ('possess', 1), ('technical', 1), ('skills', 1), ('manipulate', 1), ('code', 1), ('create', 1), ('functional', 1), ('websites', 1), ('applications', 1), ('comprehension', 1), ('underlying', 1), ('risks', 1), ('involved', 1), ('limited', 1), ('Similar', 1), ('technicians', 1), ('lack', 1), ('comprehensive', 1), ('understand', 1), ('utilize', 1), ('languages', 1), ('struggle', 1), ('grasp', 1), ('intricate', 1), ('workings', 1), ('vulnerabilities', 1), ('lie', 1), ('beneath', 1), ('surface', 1), ('result', 1), ('inadvertently', 1), ('introduce', 1), ('security', 1), ('flaws', 1), ('encounter', 1), ('difficulties', 1), ('troubleshooting', 1), ('issues', 1), ('deeper', 1), ('best', 1), ('practices', 1), ('could', 1), ('approach', 1), ('endeavors', 1), ('heightened', 1), ('awareness', 1), ('ensuring', 1), ('integrity', 1), ('reliability', 1), ('resources', 1)]\n",
      "test 2 originaltext word total: 53\n",
      "test 2 originaltext: [('got', 2), ('searched', 2), ('library', 2), ('hours', 2), ('example', 2), ('first', 2), ('link', 2), ('would', 2), ('app', 1), ('finds', 1), ('match', 1), ('injects', 1), ('links', 1), ('matches', 1), ('beginning', 1), ('list', 1), ('results', 1), ('discovery', 1), ('tool', 1), ('extremely', 1), ('useful', 1), ('variety', 1), ('terms', 1), ('might', 1), ('want', 1), ('provide', 1), ('alternate', 1), ('additional', 1), ('answers', 1), ('someone', 1), ('confused', 1), ('website', 1), ('page', 1), ('events', 1), ('calendar', 1), ('chinese', 1), ('literature', 1), ('result', 1), ('LibGuide', 1), ('subject', 1), ('looks', 1), ('like', 1), ('see', 1), ('figure', 1), ('22', 1)]\n",
      "test 2 faketext word total: 69\n",
      "test 2 faketext: [('search', 4), ('layer', 4), ('application', 3), ('results', 3), ('discovery', 2), ('data', 2), ('patrons', 2), ('simultaneous', 1), ('allows', 1), ('provide', 1), ('additional', 1), ('relevant', 1), ('alongside', 1), ('regular', 1), ('generated', 1), ('tapping', 1), ('power', 1), ('underlying', 1), ('enhances', 1), ('experience', 1), ('offering', 1), ('comprehensive', 1), ('tailored', 1), ('set', 1), ('However', 1), ('important', 1), ('note', 1), ('act', 1), ('hijacking', 1), ('carried', 1), ('consent', 1), ('collaboration', 1), ('involved', 1), ('departments', 1), ('ensuring', 1), ('aligns', 1), ('librarys', 1), ('goals', 1), ('serves', 1), ('needs', 1), ('innovative', 1), ('approach', 1), ('bridges', 1), ('gap', 1), ('dedicated', 1), ('enriching', 1), ('functionality', 1), ('ultimately', 1), ('providing', 1), ('valuable', 1), ('efficient', 1), ('information', 1), ('retrieval', 1), ('system', 1), ('library', 1), ('users', 1)]\n",
      "test 2 originaltext in situ word total: 81\n",
      "test 2 originaltext in situ: [('library', 3), ('layer', 3), ('application', 2), ('performs', 2), ('discovery', 2), ('data', 2), ('terms', 2), ('got', 2), ('searched', 2), ('hours', 2), ('example', 2), ('first', 2), ('link', 2), ('would', 2), ('request', 1), ('two', 1), ('departments', 1), ('recently', 1), ('built', 1), ('mild', 1), ('act', 1), ('hijacking', 1), ('patron', 1), ('search', 1), ('simultaneously', 1), ('searches', 1), ('question', 1), ('case', 1), ('JSON', 1), ('file', 1), ('app', 1), ('finds', 1), ('match', 1), ('injects', 1), ('links', 1), ('matches', 1), ('beginning', 1), ('list', 1), ('results', 1), ('tool', 1), ('extremely', 1), ('useful', 1), ('variety', 1), ('might', 1), ('want', 1), ('provide', 1), ('alternate', 1), ('additional', 1), ('answers', 1), ('someone', 1), ('confused', 1), ('website', 1), ('page', 1), ('events', 1), ('calendar', 1), ('chinese', 1), ('literature', 1), ('result', 1), ('LibGuide', 1), ('subject', 1), ('looks', 1), ('like', 1), ('see', 1), ('figure', 1), ('22', 1)]\n",
      "test 2 faketext word total in situ: 97\n",
      "test 2 faketext in situ: [('layer', 7), ('application', 5), ('search', 5), ('data', 4), ('discovery', 3), ('results', 3), ('departments', 2), ('library', 2), ('performs', 2), ('act', 2), ('hijacking', 2), ('patrons', 2), ('request', 1), ('two', 1), ('recently', 1), ('built', 1), ('mild', 1), ('patron', 1), ('simultaneously', 1), ('searches', 1), ('terms', 1), ('question', 1), ('case', 1), ('JSON', 1), ('file', 1), ('simultaneous', 1), ('allows', 1), ('provide', 1), ('additional', 1), ('relevant', 1), ('alongside', 1), ('regular', 1), ('generated', 1), ('tapping', 1), ('power', 1), ('underlying', 1), ('enhances', 1), ('experience', 1), ('offering', 1), ('comprehensive', 1), ('tailored', 1), ('set', 1), ('However', 1), ('important', 1), ('note', 1), ('carried', 1), ('consent', 1), ('collaboration', 1), ('involved', 1), ('ensuring', 1), ('aligns', 1), ('librarys', 1), ('goals', 1), ('serves', 1), ('needs', 1), ('innovative', 1), ('approach', 1), ('bridges', 1), ('gap', 1), ('dedicated', 1), ('enriching', 1), ('functionality', 1), ('ultimately', 1), ('providing', 1), ('valuable', 1), ('efficient', 1), ('information', 1), ('retrieval', 1), ('system', 1), ('users', 1)]\n",
      "test 3 originaltext word total: 15\n",
      "test 3 originaltext: [('important', 2), ('Thats', 1), ('leave', 1), ('plenty', 1), ('elucidating', 1), ('comments', 1), ('code', 1), ('also', 1), ('build', 1), ('straightforward', 1), ('ways', 1), ('others', 1), ('reasonably', 1), ('follow', 1)]\n",
      "test 3 faketext word total: 71\n",
      "test 3 faketext: [('pages', 3), ('applications', 3), ('web', 2), ('functionality', 2), ('potential', 2), ('multipage', 2), ('page', 2), ('multiple', 1), ('application', 1), ('allows', 1), ('better', 1), ('organization', 1), ('structure', 1), ('particularly', 1), ('larger', 1), ('complex', 1), ('dividing', 1), ('separate', 1), ('becomes', 1), ('easier', 1), ('manage', 1), ('maintain', 1), ('codebase', 1), ('original', 1), ('developer', 1), ('future', 1), ('contributors', 1), ('serves', 1), ('specific', 1), ('purpose', 1), ('task', 1), ('leading', 1), ('users', 1), ('series', 1), ('interconnected', 1), ('complete', 1), ('desired', 1), ('actions', 1), ('approach', 1), ('avoids', 1), ('pitfalls', 1), ('overwhelming', 1), ('single', 1), ('excessive', 1), ('code', 1), ('content', 1), ('could', 1), ('make', 1), ('difficult', 1), ('understand', 1), ('modify', 1), ('adopting', 1), ('architecture', 1), ('developers', 1), ('strike', 1), ('balance', 1), ('maintainability', 1), ('ensuring', 1), ('remains', 1), ('manageable', 1), ('comprehensible', 1), ('time', 1)]\n",
      "test 3 originaltext in situ word total: 102\n",
      "test 3 originaltext in situ: [('page', 5), ('web', 4), ('applications', 3), ('single', 2), ('require', 2), ('work', 2), ('done', 2), ('follow', 2), ('build', 2), ('important', 2), ('Fundamentally', 1), ('exactly', 1), ('sound', 1), ('like', 1), ('Singlepage', 1), ('designed', 1), ('around', 1), ('generally', 1), ('built', 1), ('dont', 1), ('reloads', 1), ('either', 1), ('client', 1), ('JavaScript', 1), ('access', 1), ('sources', 1), ('data', 1), ('serverside', 1), ('scripts', 1), ('need', 1), ('kind', 1), ('asynchronous', 1), ('call', 1), ('doesnt', 1), ('another', 1), ('load', 1), ('Asynchronous', 1), ('JAvascript', 1), ('XML', 1), ('AJAX', 1), ('newer', 1), ('somewhat', 1), ('cleaner', 1), ('clientside', 1), ('technology', 1), ('Promises', 1), ('new', 1), ('ECMA', 1), ('6', 1), ('standard', 1), ('Well', 1), ('talk', 1), ('later', 1), ('book', 1), ('Multipage', 1), ('send', 1), ('users', 1), ('one', 1), ('Often', 1), ('larger', 1), ('dynamically', 1), ('rewriting', 1), ('everything', 1), ('within', 1), ('becomes', 1), ('cumbersome', 1), ('hard', 1), ('developers', 1), ('Remember', 1), ('whatever', 1), ('must', 1), ('maintained', 1), ('possibly', 1), ('someone', 1), ('else', 1), ('Thats', 1), ('leave', 1), ('plenty', 1), ('elucidating', 1), ('comments', 1), ('code', 1), ('also', 1), ('straightforward', 1), ('ways', 1), ('others', 1), ('reasonably', 1)]\n",
      "test 3 faketext word total in situ: 158\n",
      "test 3 faketext in situ: [('page', 7), ('web', 6), ('applications', 6), ('single', 3), ('pages', 3), ('require', 2), ('work', 2), ('done', 2), ('users', 2), ('larger', 2), ('becomes', 2), ('developers', 2), ('functionality', 2), ('potential', 2), ('multipage', 2), ('Fundamentally', 1), ('exactly', 1), ('sound', 1), ('like', 1), ('Singlepage', 1), ('designed', 1), ('around', 1), ('generally', 1), ('built', 1), ('dont', 1), ('reloads', 1), ('either', 1), ('client', 1), ('JavaScript', 1), ('access', 1), ('sources', 1), ('data', 1), ('serverside', 1), ('scripts', 1), ('need', 1), ('kind', 1), ('asynchronous', 1), ('call', 1), ('doesnt', 1), ('another', 1), ('load', 1), ('Asynchronous', 1), ('JAvascript', 1), ('XML', 1), ('AJAX', 1), ('newer', 1), ('somewhat', 1), ('cleaner', 1), ('clientside', 1), ('technology', 1), ('Promises', 1), ('new', 1), ('ECMA', 1), ('6', 1), ('standard', 1), ('Well', 1), ('talk', 1), ('later', 1), ('book', 1), ('Multipage', 1), ('send', 1), ('one', 1), ('Often', 1), ('dynamically', 1), ('rewriting', 1), ('everything', 1), ('within', 1), ('cumbersome', 1), ('hard', 1), ('follow', 1), ('Remember', 1), ('whatever', 1), ('build', 1), ('must', 1), ('maintained', 1), ('possibly', 1), ('someone', 1), ('else', 1), ('multiple', 1), ('application', 1), ('allows', 1), ('better', 1), ('organization', 1), ('structure', 1), ('particularly', 1), ('complex', 1), ('dividing', 1), ('separate', 1), ('easier', 1), ('manage', 1), ('maintain', 1), ('codebase', 1), ('original', 1), ('developer', 1), ('future', 1), ('contributors', 1), ('serves', 1), ('specific', 1), ('purpose', 1), ('task', 1), ('leading', 1), ('series', 1), ('interconnected', 1), ('complete', 1), ('desired', 1), ('actions', 1), ('approach', 1), ('avoids', 1), ('pitfalls', 1), ('overwhelming', 1), ('excessive', 1), ('code', 1), ('content', 1), ('could', 1), ('make', 1), ('difficult', 1), ('understand', 1), ('modify', 1), ('adopting', 1), ('architecture', 1), ('strike', 1), ('balance', 1), ('maintainability', 1), ('ensuring', 1), ('remains', 1), ('manageable', 1), ('comprehensible', 1), ('time', 1)]\n",
      "test 4 originaltext word total: 20\n",
      "test 4 originaltext: [('temporary', 1), ('safe', 1), ('space', 1), ('needed', 1), ('start', 1), ('thinking', 1), ('either', 1), ('contacting', 1), ('outside', 1), ('world', 1), ('likely', 1), ('getting', 1), ('hell', 1), ('island', 1), ('divided', 1), ('group', 1), ('simply', 1), ('made', 1), ('problem', 1), ('difficult', 1)]\n",
      "test 4 faketext word total: 144\n",
      "test 4 faketext: [('Nelsons', 3), ('Lab', 3), ('Nelson', 3), ('mind', 2), ('circumstances', 2), ('within', 2), ('clear', 2), ('knew', 2), ('needed', 2), ('raced', 1), ('considered', 1), ('predicament', 1), ('layout', 1), ('individual', 1), ('workspaces', 1), ('seemed', 1), ('advantageous', 1), ('normal', 1), ('allowing', 1), ('person', 1), ('focus', 1), ('tasks', 1), ('without', 1), ('encroaching', 1), ('one', 1), ('another', 1), ('However', 1), ('trapped', 1), ('confines', 1), ('oncedesirable', 1), ('spaciousness', 1), ('felt', 1), ('like', 1), ('hindrance', 1), ('isolation', 1), ('became', 1), ('apparent', 1), ('eerie', 1), ('silence', 1), ('settled', 1), ('broken', 1), ('sound', 1), ('breathing', 1), ('Suriyamas', 1), ('horrified', 1), ('expression', 1), ('mirrored', 1), ('growing', 1), ('unease', 1), ('Robards', 1), ('stood', 1), ('edge', 1), ('ready', 1), ('spring', 1), ('action', 1), ('slightest', 1), ('provocation', 1), ('Rubbing', 1), ('temples', 1), ('thoughts', 1), ('assess', 1), ('situation', 1), ('realistically', 1), ('effectively', 1), ('cut', 1), ('rest', 1), ('base', 1), ('colleagues', 1), ('confined', 1), ('hyperbaric', 1), ('chamber', 1), ('C', 1), ('dire', 1), ('circumstance', 1), ('relying', 1), ('static', 1), ('defensive', 1), ('stance', 1), ('risky', 1), ('proposition', 1), ('combat', 1), ('training', 1), ('echoed', 1), ('reminding', 1), ('holding', 1), ('ground', 1), ('would', 1), ('rarely', 1), ('lead', 1), ('favorable', 1), ('outcome', 1), ('consider', 1), ('alternative', 1), ('strategies', 1), ('understanding', 1), ('rescue', 1), ('reinforcements', 1), ('might', 1), ('readily', 1), ('available', 1), ('Time', 1), ('essence', 1), ('took', 1), ('deep', 1), ('breath', 1), ('steeling', 1), ('formulate', 1), ('plan', 1), ('think', 1), ('creatively', 1), ('leveraging', 1), ('resources', 1), ('collective', 1), ('skills', 1), ('increase', 1), ('chances', 1), ('survival', 1), ('simply', 1), ('waiting', 1), ('help', 1), ('arrive', 1), ('viable', 1), ('option', 1), ('stay', 1), ('calm', 1), ('adapt', 1), ('find', 1), ('way', 1), ('navigate', 1), ('grim', 1), ('reality', 1), ('faced', 1)]\n",
      "test 4 originaltext in situ word total: 143\n",
      "test 4 originaltext in situ: [('Lab', 6), ('door', 3), ('worked', 2), ('C', 2), ('hyperbaric', 2), ('chamber', 2), ('space', 2), ('divided', 2), ('way', 2), ('people', 2), ('Nelson', 2), ('face', 2), ('world', 2), ('start', 2), ('locked', 2), ('complex', 1), ('would', 1), ('looked', 1), ('small', 1), ('major', 1), ('University', 1), ('sprawling', 1), ('Holiday', 1), ('Island', 1), ('labs', 1), ('went', 1), ('relatively', 1), ('neat', 1), ('order', 1), ('Unlike', 1), ('compact', 1), ('B', 1), ('Jeffers', 1), ('mostly', 1), ('alone', 1), ('away', 1), ('designated', 1), ('lab', 1), ('large', 1), ('dominating', 1), ('main', 1), ('gave', 1), ('room', 1), ('work', 1), ('individually', 1), ('Chang', 1), ('secured', 1), ('caught', 1), ('breath', 1), ('noises', 1), ('already', 1), ('begun', 1), ('diminish', 1), ('breathing', 1), ('quite', 1), ('quiet', 1), ('Suriyama', 1), ('taken', 1), ('several', 1), ('steps', 1), ('back', 1), ('staring', 1), ('mask', 1), ('horror', 1), ('Robards', 1), ('side', 1), ('doorway', 1), ('body', 1), ('thrumming', 1), ('wire', 1), ('tension', 1), ('looking', 1), ('like', 1), ('ready', 1), ('pounce', 1), ('anything', 1), ('forced', 1), ('stepped', 1), ('backward', 1), ('rubbed', 1), ('palms', 1), ('tried', 1), ('calm', 1), ('mind', 1), ('assessing', 1), ('situation', 1), ('grim', 1), ('base', 1), ('One', 1), ('first', 1), ('things', 1), ('taught', 1), ('combat', 1), ('training', 1), ('rely', 1), ('fixed', 1), ('defensive', 1), ('positions', 1), ('Hold', 1), ('ground', 1), ('strategies', 1), ('rarely', 1), ('times', 1), ('could', 1), ('depend', 1), ('reinforcement', 1), ('rescue', 1), ('temporary', 1), ('safe', 1), ('needed', 1), ('thinking', 1), ('either', 1), ('contacting', 1), ('outside', 1), ('likely', 1), ('getting', 1), ('hell', 1), ('island', 1), ('group', 1), ('simply', 1), ('made', 1), ('problem', 1), ('difficult', 1)]\n",
      "test 4 faketext word total in situ: 267\n",
      "test 4 faketext in situ: [('Lab', 9), ('Nelson', 5), ('C', 3), ('hyperbaric', 3), ('chamber', 3), ('way', 3), ('door', 3), ('mind', 3), ('Nelsons', 3), ('would', 2), ('worked', 2), ('people', 2), ('breath', 2), ('breathing', 2), ('face', 2), ('Robards', 2), ('like', 2), ('ready', 2), ('calm', 2), ('situation', 2), ('grim', 2), ('locked', 2), ('base', 2), ('combat', 2), ('training', 2), ('defensive', 2), ('ground', 2), ('strategies', 2), ('rarely', 2), ('rescue', 2), ('circumstances', 2), ('within', 2), ('clear', 2), ('knew', 2), ('needed', 2), ('complex', 1), ('looked', 1), ('small', 1), ('major', 1), ('University', 1), ('sprawling', 1), ('Holiday', 1), ('Island', 1), ('labs', 1), ('went', 1), ('relatively', 1), ('neat', 1), ('order', 1), ('Unlike', 1), ('compact', 1), ('B', 1), ('Jeffers', 1), ('mostly', 1), ('alone', 1), ('away', 1), ('designated', 1), ('lab', 1), ('large', 1), ('dominating', 1), ('main', 1), ('space', 1), ('divided', 1), ('gave', 1), ('room', 1), ('work', 1), ('individually', 1), ('Chang', 1), ('secured', 1), ('caught', 1), ('noises', 1), ('already', 1), ('begun', 1), ('diminish', 1), ('quite', 1), ('quiet', 1), ('Suriyama', 1), ('taken', 1), ('several', 1), ('steps', 1), ('back', 1), ('staring', 1), ('mask', 1), ('horror', 1), ('side', 1), ('doorway', 1), ('body', 1), ('thrumming', 1), ('wire', 1), ('tension', 1), ('looking', 1), ('world', 1), ('pounce', 1), ('anything', 1), ('forced', 1), ('stepped', 1), ('backward', 1), ('rubbed', 1), ('palms', 1), ('tried', 1), ('start', 1), ('assessing', 1), ('One', 1), ('first', 1), ('things', 1), ('taught', 1), ('rely', 1), ('fixed', 1), ('positions', 1), ('Hold', 1), ('times', 1), ('could', 1), ('depend', 1), ('reinforcement', 1), ('raced', 1), ('considered', 1), ('predicament', 1), ('layout', 1), ('individual', 1), ('workspaces', 1), ('seemed', 1), ('advantageous', 1), ('normal', 1), ('allowing', 1), ('person', 1), ('focus', 1), ('tasks', 1), ('without', 1), ('encroaching', 1), ('one', 1), ('another', 1), ('However', 1), ('trapped', 1), ('confines', 1), ('oncedesirable', 1), ('spaciousness', 1), ('felt', 1), ('hindrance', 1), ('isolation', 1), ('became', 1), ('apparent', 1), ('eerie', 1), ('silence', 1), ('settled', 1), ('broken', 1), ('sound', 1), ('Suriyamas', 1), ('horrified', 1), ('expression', 1), ('mirrored', 1), ('growing', 1), ('unease', 1), ('stood', 1), ('edge', 1), ('spring', 1), ('action', 1), ('slightest', 1), ('provocation', 1), ('Rubbing', 1), ('temples', 1), ('thoughts', 1), ('assess', 1), ('realistically', 1), ('effectively', 1), ('cut', 1), ('rest', 1), ('colleagues', 1), ('confined', 1), ('dire', 1), ('circumstance', 1), ('relying', 1), ('static', 1), ('stance', 1), ('risky', 1), ('proposition', 1), ('echoed', 1), ('reminding', 1), ('holding', 1), ('lead', 1), ('favorable', 1), ('outcome', 1), ('consider', 1), ('alternative', 1), ('understanding', 1), ('reinforcements', 1), ('might', 1), ('readily', 1), ('available', 1), ('Time', 1), ('essence', 1), ('took', 1), ('deep', 1), ('steeling', 1), ('formulate', 1), ('plan', 1), ('think', 1), ('creatively', 1), ('leveraging', 1), ('resources', 1), ('collective', 1), ('skills', 1), ('increase', 1), ('chances', 1), ('survival', 1), ('simply', 1), ('waiting', 1), ('help', 1), ('arrive', 1), ('viable', 1), ('option', 1), ('stay', 1), ('adapt', 1), ('find', 1), ('navigate', 1), ('reality', 1), ('faced', 1)]\n",
      "test 5 originaltext word total: 35\n",
      "test 5 originaltext: [('door', 2), ('inside', 2), ('side', 2), ('little', 1), ('five', 1), ('seconds', 1), ('later', 1), ('open', 1), ('ushering', 1), ('last', 1), ('cross', 1), ('ground', 1), ('antenna', 1), ('Robards', 1), ('scrawny', 1), ('legs', 1), ('pumping', 1), ('desperately', 1), ('head', 1), ('swiveled', 1), ('almost', 1), ('comically', 1), ('Nelson', 1), ('eased', 1), ('shut', 1), ('realized', 1), ('still', 1), ('holding', 1), ('padlock', 1), ('reason', 1), ('dropped', 1), ('chagrin', 1)]\n",
      "test 5 faketext word total: 139\n",
      "test 5 faketext: [('Nelson', 3), ('lock', 3), ('key', 3), ('trembling', 2), ('hand', 2), ('urgency', 2), ('padlock', 2), ('Nelsons', 2), ('shed', 2), ('tools', 2), ('fumbled', 1), ('threatened', 1), ('compromise', 1), ('dexterity', 1), ('situation', 1), ('intensified', 1), ('sound', 1), ('comrades', 1), ('scrambling', 1), ('antenna', 1), ('echoed', 1), ('distance', 1), ('Aware', 1), ('imminent', 1), ('arrival', 1), ('fought', 1), ('regain', 1), ('control', 1), ('nerves', 1), ('sharp', 1), ('exhale', 1), ('steadied', 1), ('grip', 1), ('small', 1), ('plain', 1), ('circle', 1), ('master', 1), ('weight', 1), ('offering', 1), ('faint', 1), ('reassurance', 1), ('metal', 1), ('slid', 1), ('slight', 1), ('resistance', 1), ('heart', 1), ('pounded', 1), ('chest', 1), ('Every', 1), ('second', 1), ('counted', 1), ('focused', 1), ('gaze', 1), ('keyhole', 1), ('willing', 1), ('manipulate', 1), ('swiftly', 1), ('efficiently', 1), ('turned', 1), ('mechanism', 1), ('clicked', 1), ('surge', 1), ('relief', 1), ('removed', 1), ('door', 1), ('creaked', 1), ('open', 1), ('revealing', 1), ('dimly', 1), ('lit', 1), ('interior', 1), ('filled', 1), ('equipment', 1), ('Knowing', 1), ('pursuers', 1), ('would', 1), ('soon', 1), ('upon', 1), ('hurriedly', 1), ('stepped', 1), ('inside', 1), ('senses', 1), ('adjusting', 1), ('musty', 1), ('scent', 1), ('cool', 1), ('air', 1), ('enveloped', 1), ('others', 1), ('joined', 1), ('breaths', 1), ('ragged', 1), ('exertion', 1), ('escape', 1), ('Time', 1), ('short', 1), ('needed', 1), ('find', 1), ('way', 1), ('fortify', 1), ('position', 1), ('devise', 1), ('exit', 1), ('strategy', 1), ('Surveying', 1), ('contents', 1), ('mind', 1), ('raced', 1), ('seeking', 1), ('advantage', 1), ('amidst', 1), ('supplies', 1), ('pushed', 1), ('forward', 1), ('spurring', 1), ('overcome', 1), ('lingering', 1), ('unease', 1), ('time', 1), ('regroup', 1), ('gather', 1), ('wits', 1), ('prepare', 1), ('challenges', 1), ('lay', 1), ('ahead', 1)]\n",
      "test 5 originaltext in situ word total: 85\n",
      "test 5 originaltext in situ: [('key', 3), ('shed', 2), ('hand', 2), ('padlock', 2), ('antenna', 2), ('door', 2), ('inside', 2), ('side', 2), ('feet', 1), ('patch', 1), ('grass', 1), ('sprinted', 1), ('hoped', 1), ('reasonably', 1), ('stealthy', 1), ('fashion', 1), ('large', 1), ('white', 1), ('constructed', 1), ('sheet', 1), ('metal', 1), ('cheap', 1), ('corrugated', 1), ('roof', 1), ('moment', 1), ('already', 1), ('recovering', 1), ('ring', 1), ('pocket', 1), ('feeling', 1), ('small', 1), ('plain', 1), ('circle', 1), ('signified', 1), ('master', 1), ('produced', 1), ('nestled', 1), ('uncomfortably', 1), ('thumb', 1), ('forefinger', 1), ('found', 1), ('shaking', 1), ('Behind', 1), ('heard', 1), ('others', 1), ('scrambling', 1), ('join', 1), ('fumbled', 1), ('lock', 1), ('little', 1), ('five', 1), ('seconds', 1), ('later', 1), ('open', 1), ('ushering', 1), ('last', 1), ('cross', 1), ('ground', 1), ('Robards', 1), ('scrawny', 1), ('legs', 1), ('pumping', 1), ('desperately', 1), ('head', 1), ('swiveled', 1), ('almost', 1), ('comically', 1), ('Nelson', 1), ('eased', 1), ('shut', 1), ('realized', 1), ('still', 1), ('holding', 1), ('reason', 1), ('dropped', 1), ('chagrin', 1)]\n",
      "test 5 faketext word total in situ: 189\n",
      "test 5 faketext in situ: [('key', 6), ('shed', 4), ('hand', 4), ('lock', 4), ('padlock', 3), ('Nelson', 3), ('metal', 2), ('small', 2), ('plain', 2), ('circle', 2), ('master', 2), ('others', 2), ('scrambling', 2), ('antenna', 2), ('fumbled', 2), ('trembling', 2), ('urgency', 2), ('Nelsons', 2), ('tools', 2), ('feet', 1), ('patch', 1), ('grass', 1), ('sprinted', 1), ('hoped', 1), ('reasonably', 1), ('stealthy', 1), ('fashion', 1), ('large', 1), ('white', 1), ('constructed', 1), ('sheet', 1), ('cheap', 1), ('corrugated', 1), ('roof', 1), ('moment', 1), ('already', 1), ('recovering', 1), ('ring', 1), ('pocket', 1), ('feeling', 1), ('signified', 1), ('produced', 1), ('nestled', 1), ('uncomfortably', 1), ('thumb', 1), ('forefinger', 1), ('found', 1), ('shaking', 1), ('Behind', 1), ('heard', 1), ('join', 1), ('threatened', 1), ('compromise', 1), ('dexterity', 1), ('situation', 1), ('intensified', 1), ('sound', 1), ('comrades', 1), ('echoed', 1), ('distance', 1), ('Aware', 1), ('imminent', 1), ('arrival', 1), ('fought', 1), ('regain', 1), ('control', 1), ('nerves', 1), ('sharp', 1), ('exhale', 1), ('steadied', 1), ('grip', 1), ('weight', 1), ('offering', 1), ('faint', 1), ('reassurance', 1), ('slid', 1), ('slight', 1), ('resistance', 1), ('heart', 1), ('pounded', 1), ('chest', 1), ('Every', 1), ('second', 1), ('counted', 1), ('focused', 1), ('gaze', 1), ('keyhole', 1), ('willing', 1), ('manipulate', 1), ('swiftly', 1), ('efficiently', 1), ('turned', 1), ('mechanism', 1), ('clicked', 1), ('surge', 1), ('relief', 1), ('removed', 1), ('door', 1), ('creaked', 1), ('open', 1), ('revealing', 1), ('dimly', 1), ('lit', 1), ('interior', 1), ('filled', 1), ('equipment', 1), ('Knowing', 1), ('pursuers', 1), ('would', 1), ('soon', 1), ('upon', 1), ('hurriedly', 1), ('stepped', 1), ('inside', 1), ('senses', 1), ('adjusting', 1), ('musty', 1), ('scent', 1), ('cool', 1), ('air', 1), ('enveloped', 1), ('joined', 1), ('breaths', 1), ('ragged', 1), ('exertion', 1), ('escape', 1), ('Time', 1), ('short', 1), ('needed', 1), ('find', 1), ('way', 1), ('fortify', 1), ('position', 1), ('devise', 1), ('exit', 1), ('strategy', 1), ('Surveying', 1), ('contents', 1), ('mind', 1), ('raced', 1), ('seeking', 1), ('advantage', 1), ('amidst', 1), ('supplies', 1), ('pushed', 1), ('forward', 1), ('spurring', 1), ('overcome', 1), ('lingering', 1), ('unease', 1), ('time', 1), ('regroup', 1), ('gather', 1), ('wits', 1), ('prepare', 1), ('challenges', 1), ('lay', 1), ('ahead', 1)]\n",
      "test 6 originaltext word total: 33\n",
      "test 6 originaltext: [('Montoya', 1), ('set', 1), ('aside', 1), ('bag', 1), ('tools', 1), ('Nederman', 1), ('Nelson', 1), ('seized', 1), ('handles', 1), ('front', 1), ('pushed', 1), ('Whittaker', 1), ('stood', 1), ('grim', 1), ('lookout', 1), ('Without', 1), ('hydraulics', 1), ('moving', 1), ('fro', 1), ('door', 1), ('heavy', 1), ('hells', 1), ('half', 1), ('acre', 1), ('soon', 1), ('showed', 1), ('grudging', 1), ('movement', 1), ('SEAL', 1), ('inside', 1), ('realized', 1), ('happening', 1), ('pitched', 1)]\n",
      "test 6 faketext word total: 180\n",
      "test 6 faketext: [('door', 7), ('metal', 4), ('pit', 3), ('Nelson', 2), ('air', 2), ('doors', 2), ('glimmer', 2), ('hope', 2), ('evident', 2), ('opening', 2), ('strength', 2), ('tension', 2), ('determination', 2), ('escape', 2), ('confines', 2), ('narrow', 2), ('observed', 1), ('massive', 1), ('couldnt', 1), ('help', 1), ('feel', 1), ('sense', 1), ('awe', 1), ('mingled', 1), ('trepidation', 1), ('formidable', 1), ('structure', 1), ('reminiscent', 1), ('prop', 1), ('postapocalyptic', 1), ('film', 1), ('exuded', 1), ('impenetrability', 1), ('solid', 1), ('construction', 1), ('composed', 1), ('heavy', 1), ('panels', 1), ('reinforced', 1), ('rivets', 1), ('indicated', 1), ('designed', 1), ('withstand', 1), ('significant', 1), ('force', 1), ('resist', 1), ('intrusion', 1), ('dim', 1), ('lighting', 1), ('cast', 1), ('eerie', 1), ('shadows', 1), ('surface', 1), ('accentuating', 1), ('ominous', 1), ('appearance', 1), ('Driven', 1), ('mixture', 1), ('curiosity', 1), ('approached', 1), ('eyes', 1), ('scanning', 1), ('rugged', 1), ('exterior', 1), ('noticed', 1), ('rail', 1), ('along', 1), ('moved', 1), ('emphasizing', 1), ('substantial', 1), ('weight', 1), ('became', 1), ('would', 1), ('require', 1), ('considerable', 1), ('effort', 1), ('adding', 1), ('mounting', 1), ('shared', 1), ('group', 1), ('mustered', 1), ('collective', 1), ('muscles', 1), ('straining', 1), ('resistance', 1), ('sound', 1), ('grunts', 1), ('strained', 1), ('breaths', 1), ('filled', 1), ('space', 1), ('exerted', 1), ('united', 1), ('goal', 1), ('Inch', 1), ('inch', 1), ('budged', 1), ('relentless', 1), ('pushing', 1), ('scraping', 1), ('felt', 1), ('like', 1), ('eternity', 1), ('yielded', 1), ('yielding', 1), ('nineteen', 1), ('inches', 1), ('sight', 1), ('even', 1), ('sliver', 1), ('freedom', 1), ('brought', 1), ('renewed', 1), ('vigor', 1), ('tired', 1), ('bodies', 1), ('spark', 1), ('weary', 1), ('souls', 1), ('Doc', 1), ('first', 1), ('emerge', 1), ('gap', 1), ('pushed', 1), ('way', 1), ('every', 1), ('movement', 1), ('others', 1), ('followed', 1), ('suit', 1), ('relief', 1), ('washed', 1), ('mingling', 1), ('lingering', 1), ('stood', 1), ('partially', 1), ('ajar', 1), ('serving', 1), ('symbol', 1), ('perseverance', 1), ('resilience', 1), ('face', 1), ('adversity', 1), ('underway', 1), ('optimism', 1), ('ignited', 1), ('within', 1), ('propelling', 1), ('forward', 1), ('unknown', 1), ('ready', 1), ('confront', 1), ('whatever', 1), ('challenges', 1), ('awaited', 1), ('beyond', 1)]\n",
      "test 6 originaltext in situ word total: 60\n",
      "test 6 originaltext in situ: [('door', 3), ('pushed', 2), ('pit', 1), ('moved', 1), ('along', 1), ('metal', 1), ('rail', 1), ('looked', 1), ('little', 1), ('like', 1), ('something', 1), ('would', 1), ('used', 1), ('secure', 1), ('nuclear', 1), ('bunker', 1), ('bad', 1), ('movie', 1), ('Montoya', 1), ('set', 1), ('aside', 1), ('bag', 1), ('tools', 1), ('Nederman', 1), ('Nelson', 1), ('seized', 1), ('handles', 1), ('front', 1), ('Whittaker', 1), ('stood', 1), ('grim', 1), ('lookout', 1), ('Without', 1), ('hydraulics', 1), ('moving', 1), ('fro', 1), ('heavy', 1), ('hells', 1), ('half', 1), ('acre', 1), ('soon', 1), ('showed', 1), ('grudging', 1), ('movement', 1), ('SEAL', 1), ('inside', 1), ('realized', 1), ('happening', 1), ('pitched', 1), ('minutes', 1), ('grunting', 1), ('effort', 1), ('opened', 1), ('nineteen', 1), ('inches', 1), ('Doc', 1), ('way', 1)]\n",
      "test 6 faketext word total in situ: 207\n",
      "test 6 faketext in situ: [('door', 9), ('metal', 5), ('pit', 3), ('moved', 2), ('along', 2), ('rail', 2), ('like', 2), ('would', 2), ('Nelson', 2), ('air', 2), ('doors', 2), ('glimmer', 2), ('hope', 2), ('evident', 2), ('opening', 2), ('strength', 2), ('effort', 2), ('tension', 2), ('determination', 2), ('escape', 2), ('confines', 2), ('narrow', 2), ('nineteen', 2), ('inches', 2), ('Doc', 2), ('pushed', 2), ('way', 2), ('looked', 1), ('little', 1), ('something', 1), ('used', 1), ('secure', 1), ('nuclear', 1), ('bunker', 1), ('bad', 1), ('movie', 1), ('observed', 1), ('massive', 1), ('couldnt', 1), ('help', 1), ('feel', 1), ('sense', 1), ('awe', 1), ('mingled', 1), ('trepidation', 1), ('formidable', 1), ('structure', 1), ('reminiscent', 1), ('prop', 1), ('postapocalyptic', 1), ('film', 1), ('exuded', 1), ('impenetrability', 1), ('solid', 1), ('construction', 1), ('composed', 1), ('heavy', 1), ('panels', 1), ('reinforced', 1), ('rivets', 1), ('indicated', 1), ('designed', 1), ('withstand', 1), ('significant', 1), ('force', 1), ('resist', 1), ('intrusion', 1), ('dim', 1), ('lighting', 1), ('cast', 1), ('eerie', 1), ('shadows', 1), ('surface', 1), ('accentuating', 1), ('ominous', 1), ('appearance', 1), ('Driven', 1), ('mixture', 1), ('curiosity', 1), ('approached', 1), ('eyes', 1), ('scanning', 1), ('rugged', 1), ('exterior', 1), ('noticed', 1), ('emphasizing', 1), ('substantial', 1), ('weight', 1), ('became', 1), ('require', 1), ('considerable', 1), ('adding', 1), ('mounting', 1), ('shared', 1), ('group', 1), ('mustered', 1), ('collective', 1), ('muscles', 1), ('straining', 1), ('resistance', 1), ('sound', 1), ('grunts', 1), ('strained', 1), ('breaths', 1), ('filled', 1), ('space', 1), ('exerted', 1), ('united', 1), ('goal', 1), ('Inch', 1), ('inch', 1), ('budged', 1), ('relentless', 1), ('pushing', 1), ('scraping', 1), ('felt', 1), ('eternity', 1), ('yielded', 1), ('yielding', 1), ('sight', 1), ('even', 1), ('sliver', 1), ('freedom', 1), ('brought', 1), ('renewed', 1), ('vigor', 1), ('tired', 1), ('bodies', 1), ('spark', 1), ('weary', 1), ('souls', 1), ('first', 1), ('emerge', 1), ('gap', 1), ('every', 1), ('movement', 1), ('others', 1), ('followed', 1), ('suit', 1), ('relief', 1), ('washed', 1), ('mingling', 1), ('lingering', 1), ('stood', 1), ('partially', 1), ('ajar', 1), ('serving', 1), ('symbol', 1), ('perseverance', 1), ('resilience', 1), ('face', 1), ('adversity', 1), ('underway', 1), ('optimism', 1), ('ignited', 1), ('within', 1), ('propelling', 1), ('forward', 1), ('unknown', 1), ('ready', 1), ('confront', 1), ('whatever', 1), ('challenges', 1), ('awaited', 1), ('beyond', 1), ('pitAfter', 1), ('minutes', 1), ('grunting', 1), ('opened', 1)]\n",
      "test 7 originaltext word total: 39\n",
      "test 7 originaltext: [('address', 3), ('variable', 3), ('see', 1), ('easy', 1), ('practice', 1), ('First', 1), ('web', 1), ('added', 1), ('question', 1), ('mark', 1), ('indicates', 1), ('browser', 1), ('complete', 1), ('anything', 1), ('beyond', 1), ('contain', 1), ('additional', 1), ('location', 1), ('information', 1), ('variables', 1), ('simple', 1), ('instantiate', 1), ('Theyre', 1), ('simply', 1), ('name', 1), ('equal', 1), ('sign', 1), ('followed', 1), ('value', 1), ('contains', 1), ('need', 1), ('add', 1), ('another', 1), ('separate', 1), ('ampersands', 1)]\n",
      "test 7 faketext word total: 126\n",
      "test 7 faketext: [('variables', 4), ('data', 4), ('request', 3), ('URL', 3), ('information', 3), ('browser', 3), ('HTTP', 2), ('additional', 2), ('GET', 2), ('developers', 2), ('specific', 2), ('subsequent', 2), ('pages', 2), ('valuable', 2), ('user', 2), ('appended', 2), ('JavaScript', 2), ('code', 2), ('seamless', 2), ('provide', 1), ('means', 1), ('transmit', 1), ('alongside', 1), ('appending', 1), ('keyvalue', 1), ('pairs', 1), ('include', 1), ('accessed', 1), ('utilized', 1), ('scripts', 1), ('feature', 1), ('proves', 1), ('especially', 1), ('comes', 1), ('passing', 1), ('inputs', 1), ('session', 1), ('identifiers', 1), ('relevant', 1), ('required', 1), ('processing', 1), ('personalization', 1), ('crafting', 1), ('extra', 1), ('seamlessly', 1), ('incorporates', 1), ('forming', 1), ('complete', 1), ('query', 1), ('reaches', 1), ('server', 1), ('interprets', 1), ('performs', 1), ('necessary', 1), ('actions', 1), ('accordingly', 1), ('However', 1), ('may', 1), ('seem', 1), ('like', 1), ('hidden', 1), ('treasure', 1), ('trove', 1), ('average', 1), ('remains', 1), ('inconspicuous', 1), ('page', 1), ('cleverly', 1), ('designed', 1), ('lies', 1), ('wait', 1), ('ready', 1), ('extract', 1), ('harvest', 1), ('concealed', 1), ('oblivious', 1), ('elements', 1), ('carries', 1), ('standard', 1), ('rendering', 1), ('execution', 1), ('processes', 1), ('carefully', 1), ('crafted', 1), ('though', 1), ('detects', 1), ('captures', 1), ('accessing', 1), ('utilizing', 1), ('contain', 1), ('process', 1), ('allows', 1), ('flow', 1), ('empowering', 1), ('create', 1), ('dynamic', 1), ('personalized', 1), ('web', 1), ('experiences', 1)]\n",
      "test 7 originaltext in situ word total: 59\n",
      "test 7 originaltext in situ: [('address', 3), ('variable', 3), ('HTTP', 2), ('contain', 2), ('information', 2), ('variables', 2), ('browser', 2), ('simply', 2), ('protocol', 1), ('allows', 1), ('GET', 1), ('requests', 1), ('extra', 1), ('form', 1), ('see', 1), ('easy', 1), ('practice', 1), ('First', 1), ('web', 1), ('added', 1), ('question', 1), ('mark', 1), ('indicates', 1), ('complete', 1), ('anything', 1), ('beyond', 1), ('additional', 1), ('location', 1), ('simple', 1), ('instantiate', 1), ('Theyre', 1), ('name', 1), ('equal', 1), ('sign', 1), ('followed', 1), ('value', 1), ('contains', 1), ('need', 1), ('add', 1), ('another', 1), ('separate', 1), ('ampersands', 1), ('ignores', 1), ('additions', 1), ('harvest', 1), ('next', 1), ('page', 1), ('little', 1), ('JavaScript', 1)]\n",
      "test 7 faketext word total in situ: 146\n",
      "test 7 faketext in situ: [('variables', 5), ('HTTP', 4), ('information', 4), ('data', 4), ('browser', 4), ('GET', 3), ('request', 3), ('URL', 3), ('JavaScript', 3), ('allows', 2), ('contain', 2), ('extra', 2), ('additional', 2), ('developers', 2), ('specific', 2), ('subsequent', 2), ('pages', 2), ('valuable', 2), ('user', 2), ('appended', 2), ('page', 2), ('code', 2), ('harvest', 2), ('seamless', 2), ('protocol', 1), ('requests', 1), ('form', 1), ('provide', 1), ('means', 1), ('transmit', 1), ('alongside', 1), ('appending', 1), ('keyvalue', 1), ('pairs', 1), ('include', 1), ('accessed', 1), ('utilized', 1), ('scripts', 1), ('feature', 1), ('proves', 1), ('especially', 1), ('comes', 1), ('passing', 1), ('inputs', 1), ('session', 1), ('identifiers', 1), ('relevant', 1), ('required', 1), ('processing', 1), ('personalization', 1), ('crafting', 1), ('seamlessly', 1), ('incorporates', 1), ('forming', 1), ('complete', 1), ('query', 1), ('reaches', 1), ('server', 1), ('interprets', 1), ('performs', 1), ('necessary', 1), ('actions', 1), ('accordingly', 1), ('However', 1), ('may', 1), ('seem', 1), ('like', 1), ('hidden', 1), ('treasure', 1), ('trove', 1), ('average', 1), ('remains', 1), ('inconspicuous', 1), ('cleverly', 1), ('designed', 1), ('lies', 1), ('wait', 1), ('ready', 1), ('extract', 1), ('concealed', 1), ('oblivious', 1), ('elements', 1), ('carries', 1), ('standard', 1), ('rendering', 1), ('execution', 1), ('processes', 1), ('carefully', 1), ('crafted', 1), ('though', 1), ('detects', 1), ('captures', 1), ('accessing', 1), ('utilizing', 1), ('process', 1), ('flow', 1), ('empowering', 1), ('create', 1), ('dynamic', 1), ('personalized', 1), ('web', 1), ('experiences', 1), ('simply', 1), ('ignores', 1), ('additions', 1), ('next', 1), ('little', 1)]\n"
     ]
    }
   ],
   "source": [
    "import string\n",
    "import nltk\n",
    "from nltk.corpus import stopwords\n",
    "\n",
    "stopWords = set(stopwords.words('english'))\n",
    "for x in range(1,8):\n",
    "    x=str(x)\n",
    "    #first we compare the straight original and generative texts\n",
    "    thisguy = open('test'+x+'_mineplain.txt').read()\n",
    "    thisguy=thisguy.translate(str.maketrans('','',string.punctuation)).replace('Â¬', ' ').replace('\\n', ' ')\n",
    "    thisguy = nltk.Text(nltk.word_tokenize(thisguy))\n",
    "    thisguy=[w for w in thisguy if not w.lower() in stopWords]\n",
    "    fdist = nltk.FreqDist(thisguy)\n",
    "    print (\"test \"+x+\" originaltext word total: \"+str(fdist.N()))\n",
    "    print (\"test \"+x+\" originaltext: \"+str(fdist.most_common(fdist.N())))\n",
    "    thatguy = open('test'+x+'_chatplain.txt').read()\n",
    "    thatguy=thatguy.translate(str.maketrans('','',string.punctuation)).replace('Â¬', ' ').replace('\\n', ' ')\n",
    "    thatguy = nltk.Text(nltk.word_tokenize(thatguy))\n",
    "    thatguy=[w for w in thatguy if not w.lower() in stopWords]\n",
    "    fdisttoo = nltk.FreqDist(thatguy)\n",
    "    print (\"test \"+x+\" faketext word total: \"+str(fdisttoo.N()))\n",
    "    print (\"test \"+x+\" faketext: \"+str(fdisttoo.most_common(fdisttoo.N())))\n",
    "    \n",
    "    makefile=\"\"\n",
    "    for t in range(len(thatguy)):\n",
    "        makefile+=thatguy[t]+\" \"\n",
    "    fileo=open(\"test\"+x+\"distance.txt\",\"w\")\n",
    "    fileo.write(makefile)\n",
    "    fileo.close()\n",
    "    \n",
    "    makefile=\"\"\n",
    "    for t in range(len(thisguy)):\n",
    "        makefile+=thisguy[t]+\" \"\n",
    "    fileo=open(\"test\"+x+\"distancetoo.txt\",\"w\")\n",
    "    fileo.write(makefile)\n",
    "    fileo.close()\n",
    "    #now the contextualized text\n",
    "    thisguy = open('test'+x+'_mine.txt').read()\n",
    "    thisguy=thisguy.translate(str.maketrans('','',string.punctuation)).replace('Â¬', ' ').replace('\\n', ' ')\n",
    "    thisguy = nltk.Text(nltk.word_tokenize(thisguy))\n",
    "    thisguy=[w for w in thisguy if not w.lower() in stopWords]\n",
    "    fdist = nltk.FreqDist(thisguy)\n",
    "    print (\"test \"+x+\" originaltext in situ word total: \"+str(fdist.N()))\n",
    "    print (\"test \"+x+\" originaltext in situ: \"+str(fdist.most_common(fdist.N())))\n",
    "    thatguy = open('test'+x+'_chat.txt').read()\n",
    "    thatguy=thatguy.translate(str.maketrans('','',string.punctuation)).replace('Â¬', ' ').replace('\\n', ' ')\n",
    "    thatguy = nltk.Text(nltk.word_tokenize(thatguy))\n",
    "    thatguy=[w for w in thatguy if not w.lower() in stopWords]\n",
    "    fdisttoo = nltk.FreqDist(thatguy)\n",
    "    print (\"test \"+x+\" faketext word total in situ: \"+str(fdisttoo.N()))\n",
    "    print (\"test \"+x+\" faketext in situ: \"+str(fdisttoo.most_common(fdisttoo.N())))\n",
    "    \n",
    "    makefile=\"\"\n",
    "    for t in range(len(thatguy)):\n",
    "        makefile+=thatguy[t]+\" \"\n",
    "    fileo=open(\"test\"+x+\"con_distance.txt\",\"w\")\n",
    "    fileo.write(makefile)\n",
    "    fileo.close()\n",
    "    \n",
    "    makefile=\"\"\n",
    "    for t in range(len(thisguy)):\n",
    "        makefile+=thisguy[t]+\" \"\n",
    "    fileo=open(\"test\"+x+\"con_distancetoo.txt\",\"w\")\n",
    "    fileo.write(makefile)\n",
    "    fileo.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "42969fba-4d31-4275-8c32-2a52c9c146a5",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from sklearn.feature_extraction.text import TfidfVectorizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "56cf2402-ded3-4cf7-a772-55b4d1b339cf",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "text1 plain: 0.05995435322067342\n",
      "text1 full: 0.592669817626361\n",
      "text2 plain: 0.05563521284808852\n",
      "text2 full: 0.39725383794074937\n",
      "text3 plain: 0.012955381597404567\n",
      "text3 full: 0.6987382226157792\n",
      "text4 plain: 0.026672252441084822\n",
      "text4 full: 0.6728421521725613\n",
      "text5 plain: 0.07328770656672128\n",
      "text5 full: 0.4427393249191032\n",
      "text6 plain: 0.0775347474636377\n",
      "text6 full: 0.3406537999093696\n",
      "text7 plain: 0.0821415506852778\n",
      "text7 full: 0.28808105534278305\n"
     ]
    }
   ],
   "source": [
    "for t in range(1,8):\n",
    "    x=str(t)\n",
    "    texts = [open(\"test\"+x+\"distance.txt\").read(),open(\"test\"+x+\"distancetoo.txt\").read()]\n",
    "    textstoo = [open(\"test\"+x+\"con_distance.txt\").read(),open(\"test\"+x+\"con_distancetoo.txt\").read()]\n",
    "    tfidf = TfidfVectorizer(decode_error='ignore').fit_transform(texts)\n",
    "    tfidftoo = TfidfVectorizer(decode_error='ignore').fit_transform(textstoo)\n",
    "    distance = tfidf * tfidf.T\n",
    "    distancetoo = tfidftoo * tfidftoo.T\n",
    "    print(\"text\"+x+\" plain: \"+str(distance[0,1]))\n",
    "    print(\"text\"+x+\" full: \"+str(distancetoo[0,1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "d24d7cb2-4a74-48ed-994d-1dd8ca17afa0",
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1: 206\n",
      "2: 67\n",
      "3: 185\n",
      "4: 271\n",
      "5: 126\n",
      "6: 70\n",
      "7: 53\n"
     ]
    }
   ],
   "source": [
    "import nltk\n",
    "for x in range(1,8):\n",
    "    x=str(x)\n",
    "    thisguy = open('test'+x+'_query.txt').read()\n",
    "    thisguy = nltk.Text(nltk.word_tokenize(thisguy))\n",
    "    fdist = nltk.FreqDist(thisguy)\n",
    "    print (x+': '+str(fdist.N()))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ff92dcc4-23f2-418c-8d86-c3515d64cddd",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
